sessions_data <- read.csv("sessions_data.csv" )
getmode <- function(v) {
x<-v
lim.inf=min(x)-1; lim.sup=max(x)+1
# hist(x,freq=FALSE,breaks=seq(lim.inf,lim.sup,0.2))
s<-density(x,from=lim.inf,to=lim.sup,bw=0.2)
n<-length(s$y)
v1<-s$y[1:(n-2)];
v2<-s$y[2:(n-1)];
v3<-s$y[3:n]
ix<-1+which((v1<v2)&(v2>v3))
#lines(s$x,s$y,col="red")
#points(s$x[ix],s$y[ix],col="blue")
md <- s$x[which(s$y==max(s$y))]
md
}
#Data Visualization
userSessions <- sessions_data
p <-userSessions %>% filter(username ==103)%>%select(duration) %>% ggplot(aes(duration)) +
geom_histogram()
ggplotly(p)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
NA
for each user sessions-duration 1. Calculate p-value 2. estimate rate parameter
# library(EnvStats)
# library(exptest)
#
#
# usersIds <- userSessions %>% group_by(username) %>%
# summarize(n = n()) %>%
# filter(n>100) %>% select(username) %>% slice_head(n=4500)
#
# results = NULL
#
# for(id in usersIds$username) {
#
# print(id)
# print(Sys.time())
# print("-------------")
# signleUserData <- userSessions %>% filter(username==id)%>%select(duration)
#
#
#
# data_with_contamination <- signleUserData$duration
#
# #Code To Handling outlier
# q1 <- quantile(data_with_contamination , c(.25))[["25%"]]
# q3 <- quantile(data_with_contamination , c(.75))[["75%"]]
# IQR <- q3-q1
# lower = q1 - 1.5*IQR
# upper = q3 + 1.5*IQR
#
#
# outliers <- boxplot(data_with_contamination, plot=FALSE)$out
# outliersPos <- which(data_with_contamination %in% outliers)
# data_AfterDelete_Outliers <- data_with_contamination[-outliersPos]
#
# outliersPro = length(outliers)/length(data_with_contamination)
#
#
# dataAfterHandling_Q_b_F_C <- data_with_contamination
# dataAfterHandling_mean <- data_with_contamination
# dataAfterHandling_median <- data_with_contamination
# dataAfterHandling_mode <- data_with_contamination
#
#
# dataAfterHandling_Q_b_F_C[dataAfterHandling_Q_b_F_C<lower] <- (lower)
# dataAfterHandling_Q_b_F_C[dataAfterHandling_Q_b_F_C>upper] <- (upper)
# estimated_rate_After_Q_b_F_C <- eexp(dataAfterHandling_Q_b_F_C, ci=TRUE, conf = 0.95)$parameters[["rate"]]
#
#
# mean = mean(data_AfterDelete_Outliers)
# dataAfterHandling_mean[outliersPos] <- (mean)
# estimated_rate_After_mean <- eexp(dataAfterHandling_mean, ci=TRUE, conf = 0.95)$parameters[["rate"]]
#
#
# median = median(data_AfterDelete_Outliers)
# dataAfterHandling_median[outliersPos] <- (median)
# estimated_rate_After_median <- eexp(dataAfterHandling_median, ci=TRUE, conf = 0.95)$parameters[["rate"]]
#
#
# # mode = getmode(data_AfterDelete_Outliers)
# # dataAfterHandling_mode[outliersPos] <- (mode)
# # estimated_rate_After_mode <- eexp(dataAfterHandling_mode, ci=TRUE, conf = 0.95)$parameters[["rate"]]
#
# estimated_rate_Before_Handling <- eexp(data_with_contamination, ci=TRUE, conf = 0.95)$parameters[["rate"]]
# #
#
# Q_b_F_C_P_value <-shapiro.exp.test(dataAfterHandling_Q_b_F_C )$p.value
# mean_P_value <-shapiro.exp.test(dataAfterHandling_mean )$p.value
# median_P_value <-shapiro.exp.test(dataAfterHandling_median )$p.value
# # mode_P_value <-shapiro.exp.test(dataAfterHandling_mode)$p.value
# Before_P_value <- shapiro.exp.test(data_with_contamination )$p.value
#
# results = rbind(
# results,
# data.frame(
# id,
# outliersPro,
# estimated_rate_Before_Handling,
# estimated_rate_After_Q_b_F_C,
# estimated_rate_After_mean,
# estimated_rate_After_median,
# # estimated_rate_After_mode ,
# Before_P_value,
# Q_b_F_C_P_value,
# mean_P_value,
# median_P_value#,
# #mode_P_value
# ))
# }
#
# ```
#
#
#
# ```{r}
# above_05 <- function(pValueList){
# percent <- mean(pValueList>.05)
# return (percent)
# }
results <- read.csv("results.csv")
#Visualization Results
results %>% select(estimated_rate_Before_Handling ,
estimated_rate_After_Q_b_F_C ,
estimated_rate_After_mean ,
estimated_rate_After_median
) %>%
gather("Method" , "estimatedRate" , estimated_rate_Before_Handling , estimated_rate_After_Q_b_F_C , estimated_rate_After_mean ,estimated_rate_After_median ) %>%
ggplot(aes(x = (estimatedRate))) +
geom_histogram( aes(colour = as.factor(Method))) +
facet_wrap(.~Method)
results %>% select(Before_P_value ,
mean_P_value ,
median_P_value ,
Q_b_F_C_P_value
) %>%
gather("Method" , "pValue" , Before_P_value , mean_P_value , median_P_value ,Q_b_F_C_P_value ) %>%
ggplot(aes(x = (pValue))) +
geom_histogram( ) +
scale_x_continuous(breaks = c(0.05)) +
facet_wrap(.~Method)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

LS0tDQp0aXRsZTogIkFwcGxpY2F0aW9uIEV4cG9uZW50aWFsIERpc3RyaWJ1dGlvbiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQogDQpgYGB7cn0NCg0KIyBzZXNzaW9uc19kYXRhIDwtIHJlYWQuY3N2KCJzZXNzaW9uc19kYXRhLmNzdiIgKQ0KDQpgYGANCg0KYGBge3IgZXJyb3I9RkFMU0UgLCB3YXJuaW5nPUZBTFNFLGVjaG89RkFMU0V9DQogbGlicmFyeSh0aWR5dmVyc2UpICANCiBsaWJyYXJ5KGx1YnJpZGF0ZSkNCiBsaWJyYXJ5KHBsb3RseSkNCiBsaWJyYXJ5KGhyYnJ0aGVtZXMpDQogDQoNCg0KYGBgDQoNCmBgYHtyfQ0KZ2V0bW9kZSA8LSBmdW5jdGlvbih2KSB7DQogIA0KICB4PC12DQogIGxpbS5pbmY9bWluKHgpLTE7IGxpbS5zdXA9bWF4KHgpKzENCiAgDQogICMgaGlzdCh4LGZyZXE9RkFMU0UsYnJlYWtzPXNlcShsaW0uaW5mLGxpbS5zdXAsMC4yKSkNCiAgczwtZGVuc2l0eSh4LGZyb209bGltLmluZix0bz1saW0uc3VwLGJ3PTAuMikNCiAgbjwtbGVuZ3RoKHMkeSkNCiAgdjE8LXMkeVsxOihuLTIpXTsNCiAgdjI8LXMkeVsyOihuLTEpXTsNCiAgdjM8LXMkeVszOm5dDQogIGl4PC0xK3doaWNoKCh2MTx2MikmKHYyPnYzKSkNCiAgDQogICNsaW5lcyhzJHgscyR5LGNvbD0icmVkIikNCiAgI3BvaW50cyhzJHhbaXhdLHMkeVtpeF0sY29sPSJibHVlIikNCiAgDQogIG1kIDwtIHMkeFt3aGljaChzJHk9PW1heChzJHkpKV0gDQogIA0KICBtZA0KfQ0KDQpgYGANCg0KDQojRGF0YSBWaXN1YWxpemF0aW9uDQoNCmBgYHtyfQ0KdXNlclNlc3Npb25zIDwtIHNlc3Npb25zX2RhdGENCiBwIDwtdXNlclNlc3Npb25zICU+JSBmaWx0ZXIodXNlcm5hbWUgPT0xMDMpJT4lc2VsZWN0KGR1cmF0aW9uKSAgJT4lIGdncGxvdChhZXMoZHVyYXRpb24pKSArIA0KICAgZ2VvbV9oaXN0b2dyYW0oKSANCiAgIGdncGxvdGx5KHApDQogICANCmBgYA0KDQpmb3IgZWFjaCB1c2VyIHNlc3Npb25zLWR1cmF0aW9uIA0KMS4gQ2FsY3VsYXRlIHAtdmFsdWUNCjIuIGVzdGltYXRlIHJhdGUgcGFyYW1ldGVyIA0KDQpgYGB7cn0NCiMgbGlicmFyeShFbnZTdGF0cykNCiMgbGlicmFyeShleHB0ZXN0KQ0KIyANCiMgDQojIHVzZXJzSWRzIDwtIHVzZXJTZXNzaW9ucyAlPiUgZ3JvdXBfYnkodXNlcm5hbWUpICU+JQ0KIyAgIHN1bW1hcml6ZShuID0gbigpKSAlPiUgDQojICAgZmlsdGVyKG4+MTAwKSAlPiUgc2VsZWN0KHVzZXJuYW1lKSAlPiUgc2xpY2VfaGVhZChuPTQ1MDApDQojIA0KIyAgICByZXN1bHRzID0gTlVMTA0KIyAgIA0KIyBmb3IoaWQgaW4gdXNlcnNJZHMkdXNlcm5hbWUpIHsNCiMgDQojICAgICBwcmludChpZCkNCiMgICAgIHByaW50KFN5cy50aW1lKCkpDQojICAgICAgcHJpbnQoIi0tLS0tLS0tLS0tLS0iKQ0KIyAgIHNpZ25sZVVzZXJEYXRhIDwtIHVzZXJTZXNzaW9ucyAlPiUgZmlsdGVyKHVzZXJuYW1lPT1pZCklPiVzZWxlY3QoZHVyYXRpb24pDQojICAgDQojICANCiMgDQojICBkYXRhX3dpdGhfY29udGFtaW5hdGlvbiA8LSAgc2lnbmxlVXNlckRhdGEkZHVyYXRpb24NCiMgICAgICANCiMgICAgICNDb2RlIFRvIEhhbmRsaW5nIG91dGxpZXIgDQojICAgICBxMSA8LSBxdWFudGlsZShkYXRhX3dpdGhfY29udGFtaW5hdGlvbiAgLCBjKC4yNSkpW1siMjUlIl1dDQojICAgICBxMyA8LSBxdWFudGlsZShkYXRhX3dpdGhfY29udGFtaW5hdGlvbiAgLCBjKC43NSkpW1siNzUlIl1dDQojICAgICBJUVIgPC0gcTMtcTENCiMgICAgIGxvd2VyICA9IHExIC0gMS41KklRUg0KIyAgICAgdXBwZXIgPSBxMyArIDEuNSpJUVINCiMgICAgIA0KIyAgICAgDQojICAgICBvdXRsaWVycyA8LSBib3hwbG90KGRhdGFfd2l0aF9jb250YW1pbmF0aW9uLCBwbG90PUZBTFNFKSRvdXQNCiMgICAgIG91dGxpZXJzUG9zIDwtIHdoaWNoKGRhdGFfd2l0aF9jb250YW1pbmF0aW9uICVpbiUgb3V0bGllcnMpDQojICAgICBkYXRhX0FmdGVyRGVsZXRlX091dGxpZXJzIDwtICBkYXRhX3dpdGhfY29udGFtaW5hdGlvblstb3V0bGllcnNQb3NdDQojICAgICANCiMgICAgIG91dGxpZXJzUHJvID0gbGVuZ3RoKG91dGxpZXJzKS9sZW5ndGgoZGF0YV93aXRoX2NvbnRhbWluYXRpb24pDQojICAgICANCiMgICAgIA0KIyAgICAgZGF0YUFmdGVySGFuZGxpbmdfUV9iX0ZfQyA8LSBkYXRhX3dpdGhfY29udGFtaW5hdGlvbg0KIyAgICAgZGF0YUFmdGVySGFuZGxpbmdfbWVhbiA8LSBkYXRhX3dpdGhfY29udGFtaW5hdGlvbg0KIyAgICAgZGF0YUFmdGVySGFuZGxpbmdfbWVkaWFuIDwtIGRhdGFfd2l0aF9jb250YW1pbmF0aW9uDQojICAgICBkYXRhQWZ0ZXJIYW5kbGluZ19tb2RlIDwtIGRhdGFfd2l0aF9jb250YW1pbmF0aW9uDQojICAgICANCiMgICAgIA0KIyAgICAgZGF0YUFmdGVySGFuZGxpbmdfUV9iX0ZfQ1tkYXRhQWZ0ZXJIYW5kbGluZ19RX2JfRl9DPGxvd2VyXSA8LSAgKGxvd2VyKQ0KIyAgICAgZGF0YUFmdGVySGFuZGxpbmdfUV9iX0ZfQ1tkYXRhQWZ0ZXJIYW5kbGluZ19RX2JfRl9DPnVwcGVyXSA8LSAgKHVwcGVyKQ0KIyAgICAgZXN0aW1hdGVkX3JhdGVfQWZ0ZXJfUV9iX0ZfQyA8LSBlZXhwKGRhdGFBZnRlckhhbmRsaW5nX1FfYl9GX0MsIGNpPVRSVUUsIGNvbmYgPSAwLjk1KSRwYXJhbWV0ZXJzW1sicmF0ZSJdXQ0KIyAgICAgDQojICAgICANCiMgICAgIG1lYW4gPSBtZWFuKGRhdGFfQWZ0ZXJEZWxldGVfT3V0bGllcnMpDQojICAgICBkYXRhQWZ0ZXJIYW5kbGluZ19tZWFuW291dGxpZXJzUG9zXSA8LSAgKG1lYW4pDQojICAgICBlc3RpbWF0ZWRfcmF0ZV9BZnRlcl9tZWFuIDwtICBlZXhwKGRhdGFBZnRlckhhbmRsaW5nX21lYW4sIGNpPVRSVUUsIGNvbmYgPSAwLjk1KSRwYXJhbWV0ZXJzW1sicmF0ZSJdXQ0KIyAgICAgDQojICAgICANCiMgICAgIG1lZGlhbiA9IG1lZGlhbihkYXRhX0FmdGVyRGVsZXRlX091dGxpZXJzKQ0KIyAgICAgZGF0YUFmdGVySGFuZGxpbmdfbWVkaWFuW291dGxpZXJzUG9zXSA8LSAgKG1lZGlhbikNCiMgICAgIGVzdGltYXRlZF9yYXRlX0FmdGVyX21lZGlhbiA8LSAgZWV4cChkYXRhQWZ0ZXJIYW5kbGluZ19tZWRpYW4sIGNpPVRSVUUsIGNvbmYgPSAwLjk1KSRwYXJhbWV0ZXJzW1sicmF0ZSJdXQ0KIyAgICAgDQojICAgICANCiMgICAgICMgbW9kZSA9IGdldG1vZGUoZGF0YV9BZnRlckRlbGV0ZV9PdXRsaWVycykNCiMgICAgICMgZGF0YUFmdGVySGFuZGxpbmdfbW9kZVtvdXRsaWVyc1Bvc10gPC0gIChtb2RlKSANCiMgICAgICMgZXN0aW1hdGVkX3JhdGVfQWZ0ZXJfbW9kZSA8LSBlZXhwKGRhdGFBZnRlckhhbmRsaW5nX21vZGUsIGNpPVRSVUUsIGNvbmYgPSAwLjk1KSRwYXJhbWV0ZXJzW1sicmF0ZSJdXQ0KIyAgICAgDQojICAgIGVzdGltYXRlZF9yYXRlX0JlZm9yZV9IYW5kbGluZyA8LSBlZXhwKGRhdGFfd2l0aF9jb250YW1pbmF0aW9uLCBjaT1UUlVFLCBjb25mID0gMC45NSkkcGFyYW1ldGVyc1tbInJhdGUiXV0NCiMgICAgICMgDQojICAgICANCiMgICAgIFFfYl9GX0NfUF92YWx1ZSA8LXNoYXBpcm8uZXhwLnRlc3QoZGF0YUFmdGVySGFuZGxpbmdfUV9iX0ZfQyAgKSRwLnZhbHVlDQojICAgICBtZWFuX1BfdmFsdWUgPC1zaGFwaXJvLmV4cC50ZXN0KGRhdGFBZnRlckhhbmRsaW5nX21lYW4gKSRwLnZhbHVlDQojICAgICBtZWRpYW5fUF92YWx1ZSA8LXNoYXBpcm8uZXhwLnRlc3QoZGF0YUFmdGVySGFuZGxpbmdfbWVkaWFuICkkcC52YWx1ZQ0KIyAgICAgIyBtb2RlX1BfdmFsdWUgPC1zaGFwaXJvLmV4cC50ZXN0KGRhdGFBZnRlckhhbmRsaW5nX21vZGUpJHAudmFsdWUNCiMgICAgIEJlZm9yZV9QX3ZhbHVlIDwtIHNoYXBpcm8uZXhwLnRlc3QoZGF0YV93aXRoX2NvbnRhbWluYXRpb24gICkkcC52YWx1ZQ0KIyAgICAgICANCiMgICAgICAgIHJlc3VsdHMgPSByYmluZCgNCiMgICAgICAgcmVzdWx0cywNCiMgICAgICAgZGF0YS5mcmFtZSgNCiMgICAgICAgICBpZCwNCiMgICAgICAgICBvdXRsaWVyc1BybywNCiMgICAgICAgICBlc3RpbWF0ZWRfcmF0ZV9CZWZvcmVfSGFuZGxpbmcsDQojICAgICAgICAgZXN0aW1hdGVkX3JhdGVfQWZ0ZXJfUV9iX0ZfQywNCiMgICAgICAgICBlc3RpbWF0ZWRfcmF0ZV9BZnRlcl9tZWFuLA0KIyAgICAgICAgIGVzdGltYXRlZF9yYXRlX0FmdGVyX21lZGlhbiwNCiMgICAgICAgICMgZXN0aW1hdGVkX3JhdGVfQWZ0ZXJfbW9kZSAsDQojICAgICAgICAgQmVmb3JlX1BfdmFsdWUsDQojICAgICAgICAgUV9iX0ZfQ19QX3ZhbHVlLA0KIyAgICAgICAgIG1lYW5fUF92YWx1ZSwNCiMgICAgICAgICBtZWRpYW5fUF92YWx1ZSMsDQojICAgICAgICAgI21vZGVfUF92YWx1ZQ0KIyAgICAgICApKQ0KIyAgIH0NCiMgICANCiMgYGBgDQojIA0KIyANCiMgDQojIGBgYHtyfQ0KIyBhYm92ZV8wNSA8LSBmdW5jdGlvbihwVmFsdWVMaXN0KXsNCiMgICBwZXJjZW50IDwtIG1lYW4ocFZhbHVlTGlzdD4uMDUpDQojICAgcmV0dXJuIChwZXJjZW50KQ0KIyB9DQoNCmBgYA0KDQpgYGB7cn0NCnJlc3VsdHMgPC0gcmVhZC5jc3YoInJlc3VsdHMuY3N2IikNCmBgYA0KDQoNCiNWaXN1YWxpemF0aW9uIFJlc3VsdHMgDQpgYGB7cn0NCnJlc3VsdHMgJT4lICAgc2VsZWN0KGVzdGltYXRlZF9yYXRlX0JlZm9yZV9IYW5kbGluZyAsIA0KICAgICAgICAgICAgICAgICAgICAgZXN0aW1hdGVkX3JhdGVfQWZ0ZXJfUV9iX0ZfQyAsIA0KICAgICAgICAgICAgICAgICAgICAgZXN0aW1hdGVkX3JhdGVfQWZ0ZXJfbWVhbiAsIA0KICAgICAgICAgICAgICAgICAgICAgZXN0aW1hdGVkX3JhdGVfQWZ0ZXJfbWVkaWFuIA0KICAgICAgICAgICAgICAgICAgICAgKSAlPiUgDQogIGdhdGhlcigiTWV0aG9kIiAsICJlc3RpbWF0ZWRSYXRlIiAsIGVzdGltYXRlZF9yYXRlX0JlZm9yZV9IYW5kbGluZyAsIGVzdGltYXRlZF9yYXRlX0FmdGVyX1FfYl9GX0MgLCBlc3RpbWF0ZWRfcmF0ZV9BZnRlcl9tZWFuICxlc3RpbWF0ZWRfcmF0ZV9BZnRlcl9tZWRpYW4gICkgJT4lIA0KICBnZ3Bsb3QoYWVzKHggPSAoZXN0aW1hdGVkUmF0ZSkpKSArIA0KICAgICBnZW9tX2hpc3RvZ3JhbSggYWVzKGNvbG91ciA9IGFzLmZhY3RvcihNZXRob2QpKSkgKyANCg0KICBmYWNldF93cmFwKC5+TWV0aG9kKQ0KDQoNCmBgYA0KDQoNCmBgYHtyfQ0KDQpyZXN1bHRzICU+JSAgIHNlbGVjdChCZWZvcmVfUF92YWx1ZSAsIA0KICAgICAgICAgICAgICAgICAgICAgbWVhbl9QX3ZhbHVlICwgDQogICAgICAgICAgICAgICAgICAgICBtZWRpYW5fUF92YWx1ZSAsIA0KICAgICAgICAgICAgICAgICAgICAgUV9iX0ZfQ19QX3ZhbHVlIA0KICAgICAgICAgICAgICAgICAgICAgKSAlPiUgDQogIGdhdGhlcigiTWV0aG9kIiAsICJwVmFsdWUiICwgQmVmb3JlX1BfdmFsdWUgLCBtZWFuX1BfdmFsdWUgLCBtZWRpYW5fUF92YWx1ZSAsUV9iX0ZfQ19QX3ZhbHVlICApICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gKHBWYWx1ZSkpKSArIA0KICAgICBnZW9tX2hpc3RvZ3JhbSggKSArIA0KIHNjYWxlX3hfY29udGludW91cyhicmVha3MgPSBjKDAuMDUpKSArDQoNCiAgZmFjZXRfd3JhcCgufk1ldGhvZCkNCmBgYA0KDQoNCg==